# -------------------------------------------------------------------
# Purpose: Creates Figure B4
# Author:  Max Posch, 25/07/2025
# Usage:   Source this script to generate all panels of the figure
# -------------------------------------------------------------------
# Check that required paths exist
stopifnot(dir.exists(pdataanalysis))
stopifnot(dir.exists(poutputappendix))


## Load data
load(file.path(pdataanalysis, "countyLevel18501940.RData"))
d <- copy(countyLevel18501940)

## Define regions
d[, div := "Northeast"]
d[between(region, 3, 4), div := "Midwest"]
d[between(region, 5, 7), div := "South"]
d[between(region, 8, 9), div := "West"]

# Plot for surname diversity (entropy_namelast_mp_adjp)
d1 <- d[, .(entropy_namelast_mp_adjp = mean(entropy_namelast_mp_adjp)), by = .(year, div)]
plotname <- file.path(poutputappendix, "figureB04a.pdf")
p1 <- d1 %>% ggplot(aes(x = year, y = entropy_namelast_mp_adjp, color = factor(div))) +
        geom_point() +
        geom_line() +
        scale_x_continuous(breaks = seq(1850, 1940, 10)) +
        scale_color_brewer(palette = "Set1", name = NULL) +
        labs(y = NULL, subtitle = "Surname diversity", x = "Year") +
        theme_minimal() +
        theme(legend.position = "bottom")
ggsave(plotname, p1, width = 4.6, height = 4.6)
cat("Figure saved to:", plotname, "\n")


# Plot for country of origin diversity (entropy_coo_adjp)
d2 <- d[, .(entropy_coo_adjp = mean(entropy_coo_adjp)), by = .(year, div)]
plotname <- file.path(poutputappendix, "figureB04b.pdf")
p2 <- d2 %>% ggplot(aes(x = year, y = entropy_coo_adjp, color = factor(div))) +
        geom_point() +
        geom_line() +
        scale_x_continuous(breaks = seq(1850, 1940, 10)) +
        scale_color_brewer(palette = "Set1", name = NULL) +
        labs(y = NULL, subtitle = "Ancestral-country diversity", x = "Year") +
        theme_minimal() +
        theme(legend.position = "bottom")
ggsave(plotname, p2, width = 4.6, height = 4.6)
cat("Figure saved to:", plotname, "\n")


# Plot for country of birth diversity (entropy_cob_adjp)
d3 <- d[, .(entropy_cob_adjp = mean(entropy_cob_adjp)), by = .(year, div)]
plotname <- file.path(poutputappendix, "figureB04c.pdf")
p3 <- d3 %>% ggplot(aes(x = year, y = entropy_cob_adjp, color = factor(div))) +
        geom_point() +
        geom_line() +
        scale_x_continuous(breaks = seq(1850, 1940, 10)) +
        scale_color_brewer(palette = "Set1", name = NULL) +
        labs(y = NULL, subtitle = "Birth-country diversity", x = "Year") +
        theme_minimal() +
        theme(legend.position = "bottom")
ggsave(plotname, p3, width = 4.6, height = 4.6)
cat("Figure saved to:", plotname, "\n")


# Plot for occupation diversity (entropy_occ1950_adjp)
d4 <- d[, .(entropy_occ1950_adjp = mean(entropy_occ1950_adjp, na.rm = TRUE)), by = .(year, div)]
plotname <- file.path(poutputappendix, "figureB04d.pdf")
p4 <- d4 %>% ggplot(aes(x = year, y = entropy_occ1950_adjp, color = factor(div))) +
        geom_point() +
        geom_line() +
        scale_x_continuous(breaks = seq(1850, 1940, 10)) +
        scale_color_brewer(palette = "Set1", name = NULL) +
        labs(y = NULL, subtitle = "Occupational diversity", x = "Year") +
        theme_minimal() +
        theme(legend.position = "bottom")
ggsave(plotname, p4, width = 4.6, height = 4.6)
cat("Figure saved to:", plotname, "\n")

